County Results
theme_c <- function(...){
# font <- "Helvetica" #assign font family up front
theme_bw() %+replace% #replace elements we want to change
theme(
#text elements
plot.title = element_text( #title
size = 16, #set font size
face = 'bold', #bold typeface
hjust = .5,
vjust = 3),
plot.subtitle = element_text( #subtitle
size = 12,
hjust = .5,
face = 'italic',
vjust = 3), #font size
axis.title = element_text( #axis titles
size = 12), #font size
axis.text = element_text( #axis text
size = 9),
legend.text = element_text(size = 12),
# t, r, b, l
plot.margin = unit(c(1,.5,.5,.5), "cm"),
legend.position = "right",
strip.text.x = element_text(size = 11, face = "bold", color="white"),
strip.text.y = element_text(size = 11, face = "bold", color="white",angle=270),
strip.background = element_rect(fill = "#3E3D3D")
) %+replace%
theme(...)
}targets_dir <- here("_targets_6_19")
county <- tar_read(ma_v1,
store =targets_dir) %>%
mutate(version = "v1") %>%
bind_rows(
tar_read(ma_v2,
store =targets_dir)
) %>%
bind_rows(
tar_read(ma_v3,
store =targets_dir)
) %>%
bind_rows(
tar_read(ma_v4,
store =targets_dir)
# )%>%
# bind_rows(
# tar_read(ma_v5,
# store =targets_dir)
# )%>%
# bind_rows(
# tar_read(ma_v6,
# store =targets_dir)
)
waste <- tar_read(waste, store =targets_dir)
covidestim <- tar_read(covidestim_biweekly_county,
store =targets_dir)
dates <- readRDS(here("data/data_raw/date_to_biweek.RDS"))wjoined <- county %>%
# set Nantucket, Duke fips to Nantucket since we have wastewater data
# for Nantucket
mutate(fips = ifelse(grepl("25019", fips), "25019", fips)) %>%
inner_join(waste) %>%
left_join(covidestim, relationship = "many-to-many") %>%
group_by(biweek) %>%
mutate(mindate = min(date),
maxdate = max(date)) %>%
ungroup()
counties_with_waste <- wjoined %>%
group_by(fips) %>%
mutate(obs_w_notna = sum(!is.na(mean_conc))) %>%
filter(obs_w_notna != 0) %>%
pull(fips) %>%
unique()
versions <- tibble(
version = c("v1", "v2", "v3", "v4", "v5", "v6"),
vlabel = factor(
c( "$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value",
"$P(S_1|untested)$ Centered at Emp. Value",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values",
"$\\beta$ Centered at Emp. Value (Spline Smoothed)",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values ($\\beta$ Spline Smoothed)"
),
levels = c(
"$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value",
"$P(S_1|untested)$ Centered at Emp. Value",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values",
"$\\beta$ Centered at Emp. Value (Spline Smoothed)",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values ($\\beta$ Spline Smoothed)"
) )
)
wjoined <- wjoined %>%
left_join(versions)
w_data <- wjoined %>%
select(-date) %>%
distinct() %>%
filter(fips %in% counties_with_waste) %>%
filter(!is.na(mean_conc)) %>%
group_by(fips, version) %>%
mutate(mean_conc_std = mean_conc/ max(mean_conc),
exp_cases_median_std = exp_cases_median / max(exp_cases_median),
exp_cases_lb_std = exp_cases_lb /max(exp_cases_median),
exp_cases_ub_std = exp_cases_ub / max(exp_cases_median),
infections_std = infections/max(infections),
positive_std = positive/max(positive)) %>%
ungroup()Correlations
Normalization Strategy 1: By Values at Biweek Where Wastewater Concentration is Maximized
w_data <- wjoined %>%
select(-date) %>%
distinct() %>%
filter(fips %in% counties_with_waste) %>%
filter(!is.na(mean_conc)) %>%
group_by(fips, version) %>%
mutate(mean_conc_std = mean_conc/ max(mean_conc),
max_conc = max(mean_conc),
exp_cases_median_std = exp_cases_median / exp_cases_median[which.max(mean_conc)],
exp_cases_lb_std = exp_cases_lb /exp_cases_median[which.max(mean_conc)],
exp_cases_ub_std = exp_cases_ub / exp_cases_median[which.max(mean_conc)],
infections_std = infections/ infections[which.max(mean_conc)],
positive_std = positive/max(positive)) %>%
ungroup()PB Counts and Wastewater
# correlations between PB counts and wastewater concentrations
correlations <- w_data %>%
group_by(version) %>%
summarize(Correlation = cor(exp_cases_median_std,mean_conc_std),
vlabel =unique(vlabel)) %>%
mutate(x=1.8,
y=.1)
w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized)",
y = "Mean Wastewater Concentration\n(Standardized)",
title = "Correlations Between Probabilistic Bias Counts\nand Mean Wastewater Concentration")plt_wastewater_pb <- w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized)",
y = "Mean Wastewater Concentration\n(Standardized)",
title = "")PB Counts and Covidestim
# correlations between PB counts and Covidestim estimates
covidestim_correlations <- w_data %>%
filter(!is.na(infections_std)) %>%
group_by(vlabel) %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(exp_cases_median_std,infections_std,
use = "complete.obs")) %>%
mutate(x=1.8,
y=.1)
w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized)",
y = "Covidestim Estimates\n(Standardized)",
title = "Correlations Between Probabilistic Bias Counts\nand Covidestim Estimates")plt_covidestim_pb <- w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized)",
y = "Covidestim Estimates\n(Standardized)",
title = "")Combined Figure
cowplot::plot_grid(plt_wastewater_pb, plt_covidestim_pb, ncol=2)Covidestim and Wastewater
covidestim_correlations <- w_data %>%
filter(version=="v1") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(mean_conc_std,infections_std,
use = "complete.obs")) %>%
mutate(x=1,
y=.1)
w_data %>%
filter(version=="v1") %>%
ggplot(aes(x=infections_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
theme_c() +
labs(x = "Covidestim Estimates\n(Standardized)",
y = "Wastewater Concentrations\n(Standardized)",
title = "Correlations Between Probabilistic Bias Counts\nand Covidestim Estimates")pb_correlations <- w_data %>%
filter(version=="v1") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(positive_std,mean_conc_std,
use = "complete.obs")) %>%
mutate(x=1.8,
y=.1) Compare to Wastewater Over Time
library(scales)
pal <- c("#10BAC5", "#1B10C5", "#EFB719", "#900C3F")
names(pal) <- c(#"$observed$",
'$P(S_1|untested)$ Centered at Emp. Value',
'$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values',
"$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value"
)
waste_df <- w_data %>%
select(biweek, mean_conc_std, name) %>%
distinct() %>%
left_join(dates,relationship = "many-to-many" ) %>%
group_by(name, biweek) %>%
# center date within 2-week interval
summarize(date = min(date) + days(7),
mean_conc_std=unique(mean_conc_std)) %>%
mutate(name = gsub("County, MA", "", name))
w_data%>%
mutate(name = gsub("County, MA", "", name)) %>%
left_join(dates,relationship = "many-to-many" ) %>%
filter(version %in% c("v1","v3")) %>%
filter(biweek>=6) %>%
group_by(biweek) %>%
mutate(mindate = min(date),
maxdate=max(date)) %>%
ungroup() %>%
# filter(fips == county_fips & date >= begin_date & date <= end_date) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb_std,
ymax = exp_cases_ub_std,
fill = vlabel),
alpha = .5) +
geom_line(data = waste_df,
aes(x = date, y =mean_conc_std,
color = 'Wastewater Effective Concentrations'),
# color = "#DB4048",
size = 1.1) +
geom_linerange(aes(xmin = mindate,
xmax = maxdate,
y = infections_std,
color = 'Covidestim Infection Estimates')) +
geom_point(data = waste_df,
aes(x = date, y =mean_conc_std ),
color = "#DB4048",
alpha = .5,
size = 1.2) +
facet_grid(name~vlabel,
labeller=labeller(vlabel =as_labeller(
TeX, default=label_parsed))) +
scale_fill_manual(values = pal, labels = c('','','',''),
name = "Probabilistic Bias Intervals") +
theme_bw() +
theme_c() +
labs(y = "Value (Standardized)",
title = "",
x= "") +
scale_x_date(date_labels = "%b %Y") +
scale_color_manual(
name = '',
values = c(
'Covidestim Infection Estimates' = 'darkblue',
'Wastewater Effective Concentrations' = '#DB4048')) +
guides(color = guide_legend(override.aes = list(size = 12,
linewidth=3)),
fill = guide_legend(override.aes =list(size = 6)))Normalization Strategy 2: By Maximum (Separately)
For each county, normalize the wastewater concentration by the maximum concentration for that county and the probabilistic bias counts by the maximum median estimated counts for that county (2.5th percentile and 97.5th percentile also standardized by the maximum median estimated counts for that county).
PB Counts and Wastewater
# correlations between PB counts and wastewater concentrations
correlations <- w_data %>%
group_by(version) %>%
summarize(Correlation = cor(exp_cases_median_std,mean_conc_std),
vlabel =unique(vlabel)) %>%
mutate(x=1.8,
y=.1)
w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized)",
y = "Mean Wastewater Concentration\n(Standardized)",
title = "Correlations Between Probabilistic Bias Counts\nand Mean Wastewater Concentration")plt_wastewater_pb <- w_data %>%
group_by(version) %>%
ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = mean_conc_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized)",
y = "Mean Wastewater Concentration\n(Standardized)",
title = "")PB Counts and Covidestim
# correlations between PB counts and Covidestim estimates
covidestim_correlations <- w_data %>%
filter(!is.na(infections_std)) %>%
group_by(vlabel) %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(exp_cases_median_std,infections_std,
use = "complete.obs")) %>%
mutate(x=1.8,
y=.1)
w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=2,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized)",
y = "Covidestim Estimates\n(Standardized)",
title = "Correlations Between Probabilistic Bias Counts\nand Covidestim Estimates")plt_covidestim_pb <- w_data %>%
ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
geom_point(alpha=.5) +
geom_linerange(aes(xmin = exp_cases_lb_std,
xmax=exp_cases_ub_std,
y = infections_std),
alpha = .3) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
facet_wrap(~vlabel,
ncol=1,
labeller= as_labeller(TeX,
default = label_parsed)) +
theme_c() +
labs(x = "Probabilistic Bias Counts\n(Standardized)",
y = "Covidestim Estimates\n(Standardized)",
title = "")Combined Figure
cowplot::plot_grid(plt_wastewater_pb, plt_covidestim_pb, ncol=2)Covidestim and Wastewater
covidestim_correlations <- w_data %>%
filter(version=="v1") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(mean_conc_std,infections_std,
use = "complete.obs")) %>%
mutate(x=1,
y=.1)
w_data %>%
filter(version=="v1") %>%
ggplot(aes(x=infections_std, y = mean_conc_std)) +
geom_point(alpha=.5) +
geom_text(
aes(x=x,y=y, label=paste0("Correlation: ",
round(Correlation,3))),
data= covidestim_correlations ) +
theme_c() +
labs(x = "Covidestim Estimates\n(Standardized)",
y = "Wastewater Concentrations\n(Standardized)",
title = "Correlations Between Probabilistic Bias Counts\nand Covidestim Estimates")pb_correlations <- w_data %>%
filter(version=="v1") %>%
# missing values are handled by casewise deletion
summarize(Correlation = cor(positive_std,mean_conc_std,
use = "complete.obs")) %>%
mutate(x=1,
y=.1)